package com.example.getbook;

import cn.hutool.core.io.file.FileReader;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.jsoup.select.NodeFilter;
import org.junit.jupiter.api.Test;
import org.thymeleaf.util.StringUtils;

import java.io.File;


public class DataBaseZiDian {


    @Test
    public void readHtml()   {


        String filePath = "C:\\Users\\Administrator\\Desktop\\自助建站数据库字典.html";

        FileReader fileReader = FileReader.create(new File(filePath));
        String html = fileReader.readString();

        Document document = Jsoup.parse(html);

        Elements mainTables =  document.getElementsByClass("MAIN_TABLE");

        for(int i=0;i<mainTables.size();i++){

            Element mainTable = mainTables.get(i);
            Element mainTitle = mainTable.getElementsByClass("MAIN_TITLE").first();
            String tableName = mainTitle.text().replace("Table ","").trim();
            String tableZhName = mainTable.getElementsByClass("DESC_TEXT").first().childNode(2).outerHtml().trim();
            System.out.println("comment on table "+tableName+" is '"+tableZhName+"';");

            Element simpleTable = mainTable.nextElementSiblings().select(".SIMPLE_TABLE").first();
            Elements trs = simpleTable.select("tr");

            if(trs.size() > 1){
                for(int j=1;j<trs.size();j++){
                    Element column  = trs.get(j);
                    String columnName = column.select(".LIST_ITEM").first().text().trim();
                    String remark = column.select(".DESC_TEXT").last().text().trim();
                    if(!StringUtils.isEmpty(remark)){
                        remark = remark.replace("'","\"");
                        System.out.println("comment on column "+tableName+"."+columnName+" is '"+remark+"';");
                    }
                }
            }

        }




    }
}
